EG started this on 20160403 how does the noise effect the loglikelihood Weibull model->theta is a scale, gamma is a shape parameter >0 Gompertz Model-> beta(G) is a scale, alpha(R) is a shape parameter>0
difference function of loglikelihood function of gompertz and weibull p.d.fs test if L(Weibull,X)>L(Gompertz,X) for parameters Weibull model->theta is a scale, gamma is a shape parameter >0 Gompertz Model-> beta is a scale, alpha is a shape parameter>0
For additive Gaussian noise e ~ N (0, sigma^2) with known variance sigma^2 sd of gaussian noise function max sd would be = 3*mean(inverse.gomp.CDF) min sd would be mean(inverse.gomp.CDF)
require(flexsurv)
## Loading required package: flexsurv
## Loading required package: survival
require(gplots)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
gamma=0.01
theta=0.025
#test G and R in nested for loops
R=0.001
G=0.2
#R should be in [0, 0.05], and G should be [0.05, 0.3].
N=100 # population size
#rgompertz(alpha,beta,N) gives random Gompertz numbers from inverse CDF of Gompertz
#where alpha and beta are 2 parameters, N is number of population
#generate gompertz random numbers by using inverse CDF
#generate random number with a given distribution of Gompertz
#prediction
rgompertz = function(R,G, N){
x.uniform = runif(N)
#inverse of Gompertz CDF
inverse.gomp.CDF = function(R,G,y) { (1/G)*log(1 - (G/R)*log(1-y) ) }
x.gompertz = inverse.gomp.CDF(R,G, x.uniform)
return(x.gompertz)
}
rweibull= function(theta,gamma,N)
{
x.uniform= runif(n)
inverse.wei.CDF=function(theta,gamma,y) { theta*(-log(1-y))^(1/gamma)}
x.weibull=inverse.wei.CDF(theta,gamma,x.uniform)
return(x.weibull)
}
#create a function that calculates noise of lifespan
calculate.noise = function(i){
#lifespan
gaussian<-rnorm(N, mean = 0, sd=i)
#observation
#X<- gompertz.random +gaussian to be used in simulation later.
#noise
noise=sd(gaussian)
return(noise)
}
#generate gompertz random numbers (lifespan)
#prediction
gompertz.random<-rgompertz(R,G,N)
average.lifespan=mean(gompertz.random)
#put the variable values from loops into lists
sderr<- list()
Delta_LL<-list()
G<-list()
R<-list()
LWei<-list()
LGomp<-list()
MeanLF<-list()
sdLS<-list()
Delta_LL.flex<-list()
LWei.flex<-list()
LGomp.flex<-list()
G.flex.estimated<-list()
R.flex.estimated<-list()
LLG.par<-list()
LLR.par<-list()
gamma.flex.estimated<-list()
theta.flex.estimated<-list()
for (R_in in c(1E-3, 0.002, 0.005,0.008, 0.01,0.03, 0.05)){ #fix alpha or in other words R shape parameter
for (G_in in c(0.05,0.08, 0.1,0.15,0.17, 0.2, 0.25)){
#for (sd in seq(round.lifespan,3*round.lifespan,by=1)){
for (i in c(0, 0.5, 1, 2,3,4, 5)){
#for (i in c(0)){
#generate gompertz random numbers (lifespan)
#prediction
gompertz.random<-rgompertz(R_in,G_in,N)
average.lifespan=mean(gompertz.random)
#store average.lifespan into MeanLF list
MeanLF[[length(MeanLF)+1]]=average.lifespan
#check the sd by using calculate.noise() function
sd.gaussian=calculate.noise(i)
#generate gaussion random numbers
gaussian<-rnorm(N, mean = 2*average.lifespan, sd=i)
#standard deviation of gompertz.random
sd.lifespan=sd(gompertz.random)
#store sd of lifespan into SdLS list
sdLS[[length(sdLS)+1]] =sd.lifespan
#add gaussian random numbers to gompertz random numbers
lifespan<- gompertz.random +gaussian
#Log likelihood function for the Weibull model
weib.likl<-function(param,y){
theta<-exp(param[1]) #take exponential to avoid NaNs when taking log(theta)
gamma<-exp(param[2]) # avoid NaNs when taking log(gamma)
delta=1; # delta is 1 for right censored data which is our case; lifespan>0
y=lifespan[!is.na(lifespan)]
logl<-sum(delta*(log(gamma) + gamma*log(theta) + (gamma-1)*log(y) -
(theta*y)^gamma )) -sum((1-delta)*(theta*y)^gamma)
return(-logl)
}
# take log(param) since you take exponential above to avoid NaN values above
weib=optim(log(c(0.001,0.001)),weib.likl,y=lifespan)
weib$value
LWei[[length(LWei)+1]] = weib$value
gomp.likl <- function (param,y){
G_in<-param[1]
R_in<-param[2]
delta=1
y=lifespan[!is.na(lifespan)]
logl<-sum(delta*(log(G_in)+R_in*y+(-(G_in/R_in)*(exp(R_in*y)-1)))) +
sum((1-delta)*(-(G_in/R_in)*(exp(R_in*y)-1)))
return(-logl)
}
gomp<-optim(c(0.01,0.01),gomp.likl,y=lifespan)
#c(0.01,0.01) (best one)
#c(0.025,0.03)
#R should be in [0, 0.05], and G should be [0.05, 0.3].
gomp$value
#store loglikelihood values of gompertz optimized results into LGomp list
LGomp[[length(LGomp)+1]] = gomp$value
# store R and G estimation from optim of likl functions in Gompertz
LLG.par[[length(LLG.par)+1]] =gomp$par[1]
LLR.par[[length(LLR.par)+1]]=gomp$par[2]
delta.likelihood.wei<- -(weib$value-gomp$value)
#calculate LL and noise change
sderr[[length(sderr)+1]] = i
Delta_LL[[length(Delta_LL)+1]] = delta.likelihood.wei
G[[length(G)+1]]=G_in
#switch to alpha.seq when for fixed beta
R[[length(R)+1]]=R_in
#todo use flexsurv to calculate the LL
#flexsurv only works with positive variables.
#fix gaussian std to 0
gaussian.flex= rnorm(N, mean = 2*average.lifespan, sd=0)
X.flex= gompertz.random +gaussian.flex
fitGomp = flexsurvreg(formula = Surv(X.flex) ~ 1, dist="gompertz")
fitWei = flexsurvreg(formula = Surv(X.flex) ~ 1, dist="weibull")
LWei.flex[[length(LWei.flex)+1]]=fitWei$loglik
LGomp.flex[[length(LGomp.flex)+1]]=fitGomp$loglik
param.Gomp<-fitGomp$res; R.flex<-param.Gomp[2]; G.flex<-param.Gomp[1];
R.flex.estimated[[length(R.flex.estimated)+1]]<-R.flex
G.flex.estimated[[length(G.flex.estimated)+1]]<-G.flex
param.Wei<-fitWei$res; gamma.flex<-param.Wei[1]; theta.flex<-param.Wei[2];
gamma.flex.estimated[[length(gamma.flex.estimated)+1]]<-gamma.flex;
theta.flex.estimated[[length(theta.flex.estimated)+1]]<-theta.flex
delta_flexsurv=fitWei$loglik-fitGomp$loglik
#fitWei$loglik
Delta_LL.flex[[length(Delta_LL.flex)+1]]=delta_flexsurv
}
}
}
#semi-log plot for Gompertz mortality rate
m.gomp = R_in * exp( G_in * lifespan )
log_m.gomp = log(R_in) + G_in * lifespan;
#pdf(paste("plots/","Gompertz.semi.log.plot.batch.pdf", sep=''), width=5, height=5)
plot(log_m.gomp ,lifespan)
#dev.off()
#log-log plot for Weibull mortality rate
m.wei = theta * (lifespan^gamma)
log_m.wei =log(theta)+ gamma*log(lifespan)
#pdf(paste("plots/","Weibull.log.log.plot.batch.pdf", sep=''), width=5, height=5)
plot(log_m.wei,lifespan)
#dev.off()
#put everything into a data frame
results = data.frame(cbind(sderr), cbind(R),cbind(LLR.par),cbind(R.flex.estimated),cbind(G),cbind(LLG.par),cbind(G.flex.estimated),cbind(Delta_LL) , cbind(Delta_LL.flex),
cbind(LWei),cbind(LWei.flex), cbind(LGomp),cbind(LGomp.flex), cbind(MeanLF), cbind(sdLS))
results_mat<-as.matrix(results)
write.csv(results_mat,file="Results.csv")
#write.csv(results_mat,file="noise_zero.csv")
dLL<-unlist(results$Delta_LL )
dLL.flex<-unlist(results$Delta_LL.flex)
LLGomp<-unlist(results$LGomp)
LLGomp.flex<- unlist(results$LGomp.flex)
LLWei<- unlist(results$LWei)
LLWei.flex<- unlist(results$LWei.flex)
simulated.G<-unlist(results$G)
estimated.G.flex<-unlist(results$G.flex.estimated)
simulated.R<-unlist(results$R)
estimated.R.flex<-unlist(results$R.flex.estimated)
estimatedLL.G<-unlist(results$LLG.par)
estimatedLL.R<-unlist(results$LLR.par)
#LLGomp.flex
#delta.output<-data.frame(dLL,dLL.flex,LLGomp,LLGomp.flex,LLWei,LLWei.flex)
summary( lm( dLL~ dLL.flex))
##
## Call:
## lm(formula = dLL ~ dLL.flex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.3839 -0.8970 -0.4044 0.4965 13.9418
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0891 0.1292 8.429 9.93e-16 ***
## dLL.flex 0.8342 0.0307 27.169 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.733 on 341 degrees of freedom
## Multiple R-squared: 0.684, Adjusted R-squared: 0.6831
## F-statistic: 738.1 on 1 and 341 DF, p-value: < 2.2e-16
summary( lm( LLGomp ~ LLGomp.flex))
##
## Call:
## lm(formula = LLGomp ~ LLGomp.flex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.806 -9.520 -2.193 5.662 48.840
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 51.62891 4.57818 11.28 <2e-16 ***
## LLGomp.flex -0.87663 0.01328 -66.01 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.11 on 341 degrees of freedom
## Multiple R-squared: 0.9274, Adjusted R-squared: 0.9272
## F-statistic: 4357 on 1 and 341 DF, p-value: < 2.2e-16
summary( lm( LLWei ~ LLWei.flex))
##
## Call:
## lm(formula = LLWei ~ LLWei.flex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.332 -8.747 -1.471 5.273 53.011
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 47.88852 4.31140 11.11 <2e-16 ***
## LLWei.flex -0.88483 0.01261 -70.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.48 on 341 degrees of freedom
## Multiple R-squared: 0.9352, Adjusted R-squared: 0.935
## F-statistic: 4922 on 1 and 341 DF, p-value: < 2.2e-16
summary(lm(LLWei~LLGomp))
##
## Call:
## lm(formula = LLWei ~ LLGomp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.5083 -1.5240 0.6462 2.3225 5.1286
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.928802 1.312902 -2.992 0.00297 **
## LLGomp 1.001192 0.003713 269.643 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.082 on 341 degrees of freedom
## Multiple R-squared: 0.9953, Adjusted R-squared: 0.9953
## F-statistic: 7.271e+04 on 1 and 341 DF, p-value: < 2.2e-16
summary(lm(LLWei.flex~LLGomp.flex))
##
## Call:
## lm(formula = LLWei.flex ~ LLGomp.flex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.6075 -2.2037 -0.9279 1.3770 13.2899
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.704828 1.153515 1.478 0.14
## LLGomp.flex 0.996489 0.003346 297.814 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.051 on 341 degrees of freedom
## Multiple R-squared: 0.9962, Adjusted R-squared: 0.9962
## F-statistic: 8.869e+04 on 1 and 341 DF, p-value: < 2.2e-16
summary(lm(simulated.G~estimated.G.flex))
##
## Call:
## lm(formula = simulated.G ~ estimated.G.flex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.06944 -0.01075 0.00280 0.01545 0.05305
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.004476 0.003167 1.413 0.158
## estimated.G.flex 0.807968 0.016922 47.747 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.02366 on 341 degrees of freedom
## Multiple R-squared: 0.8699, Adjusted R-squared: 0.8695
## F-statistic: 2280 on 1 and 341 DF, p-value: < 2.2e-16
summary(lm(simulated.R~estimated.R.flex))
##
## Call:
## lm(formula = simulated.R ~ estimated.R.flex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.059156 -0.006701 -0.003105 0.000874 0.036855
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.687e-03 6.577e-04 13.21 <2e-16 ***
## estimated.R.flex 3.080e+01 1.405e+00 21.92 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01089 on 341 degrees of freedom
## Multiple R-squared: 0.585, Adjusted R-squared: 0.5837
## F-statistic: 480.6 on 1 and 341 DF, p-value: < 2.2e-16
summary(lm(simulated.R~estimatedLL.R))
##
## Call:
## lm(formula = simulated.R ~ estimatedLL.R)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.025214 -0.011032 -0.005949 0.010761 0.041083
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.003589 0.002260 1.588 0.113
## estimatedLL.R 0.076591 0.013815 5.544 5.93e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01619 on 341 degrees of freedom
## Multiple R-squared: 0.08268, Adjusted R-squared: 0.07999
## F-statistic: 30.74 on 1 and 341 DF, p-value: 5.932e-08
summary(lm(simulated.G~estimatedLL.G))
##
## Call:
## lm(formula = simulated.G ~ estimatedLL.G)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.100379 -0.062236 0.007733 0.057286 0.107773
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.142227 0.003908 36.398 <2e-16 ***
## estimatedLL.G 1.586726 4.163375 0.381 0.703
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06558 on 341 degrees of freedom
## Multiple R-squared: 0.0004258, Adjusted R-squared: -0.002506
## F-statistic: 0.1452 on 1 and 341 DF, p-value: 0.7034
summary(lm(estimatedLL.G~estimated.G.flex))
##
## Call:
## lm(formula = estimatedLL.G ~ estimated.G.flex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0007191 -0.0004078 -0.0002239 -0.0000614 0.0054917
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.026e-05 1.116e-04 -0.092 0.927
## estimated.G.flex 2.377e-03 5.963e-04 3.986 8.21e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0008338 on 341 degrees of freedom
## Multiple R-squared: 0.04452, Adjusted R-squared: 0.04172
## F-statistic: 15.89 on 1 and 341 DF, p-value: 8.213e-05
summary(lm(estimatedLL.R~estimated.R.flex))
##
## Call:
## lm(formula = estimatedLL.R ~ estimated.R.flex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.104934 -0.050447 -0.003194 0.038923 0.222460
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.151199 0.003832 39.453 <2e-16 ***
## estimated.R.flex -1.679010 8.187604 -0.205 0.838
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06346 on 341 degrees of freedom
## Multiple R-squared: 0.0001233, Adjusted R-squared: -0.002809
## F-statistic: 0.04205 on 1 and 341 DF, p-value: 0.8376
#pdf(paste("plots/","simulated.G.vs.estimated.G.flex.batch.pdf", sep=''), width=5, height=5)
plot(simulated.G,estimated.G.flex)
#dev.off()
#pdf(paste("plots/","simulated.R.vs.estimated.R.flex.batch.pdf", sep=''), width=5, height=5)
plot(simulated.R,estimated.R.flex)
#dev.off()
#pdf(paste("plots/","estimatedLL.G.vs.estimated.G.flex.batch.pdf", sep=''), width=5, height=5)
plot(estimatedLL.G,estimated.G.flex)
#dev.off()
#pdf(paste("plots/","estimatedLL.R.vs.estimated.R.flex.batch.pdf", sep=''), width=5, height=5)
plot(estimatedLL.R,estimated.R.flex)
#dev.off()
results.sub<-data.frame(cbind(sderr),cbind(R),cbind(G),cbind(Delta_LL))
R.els = unlist( unique(results.sub$R))
colnum = length(R.els)
tmp = unlist( unique(results.sub$sderr))
noise.els = tmp[order(tmp)]
rownum = length(noise.els)
mat = matrix( data=NA, nrow= rownum, ncol=colnum) #noise as row, alpha as columns
rownames(mat) = noise.els
colnames(mat) = R.els
for (k in c(0.05,0.08, 0.1,0.15,0.17, 0.2, 0.25)){
data = results.sub[results.sub[,3]==k, 4]
data<-unlist(data)
heat_mat<-matrix(data,ncol=colnum,nrow=rownum)
#rownames(heat_mat, do.NULL = TRUE, prefix = "row")
rownames(heat_mat) <- c("0","0.5","1","2","3","4","5")
colnames(heat_mat) <- R.els
library(gplots)
hM <- format(round(heat_mat, 2))
data_mat<-scale(heat_mat,scale=TRUE,center=FALSE)
#paste(file = "~/github/model.comparison/plots/heatplot_zero_noise_G",k,".jpeg",sep="")
#jpeg(paste("plots/",k, ".fixed.G.jpg", sep=''))
#paste(“myplot_”, i, “.jpeg”, sep=””)
heatmap.2(data_mat, cellnote=hM,col = cm.colors(256), scale="none", notecol="black", margins=c(5,10),
dendrogram='none', Rowv=FALSE, Colv=FALSE,trace='none',
xlab = "R parameters",
ylab = "noise", main = bquote(paste("R vs. sd dLL at" ~ G==.(k))),par(cex.main=.5),srtCol=315, adjCol = c(0,1),cexRow=0.8,cexCol=0.8)
#dev.off()
}
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
# heatmap for fixed noise
G.els = unlist( unique(results.sub$G))
colnum = length(G.els)
R.els=unlist(unique(results.sub$R))
rownum = length(R.els)
mat = matrix( data=NA, nrow= rownum, ncol=colnum) #noise as row, alpha as columns
rownames(mat) = R.els
colnames(mat) = G.els
for (n in c(0, 0.5, 1,2,3,4,5) ){
#for (n in c(0)){
data = results.sub[results.sub[,1]==n, 4]
data<-unlist(data)
heat_mat<-matrix(data,ncol=colnum,nrow=rownum)
#rownames(heat_mat, do.NULL = TRUE, prefix = "row")
rownames(heat_mat) <- R.els
colnames(heat_mat) <- G.els
library(gplots)
hM <- format(round(heat_mat, 2))
data_mat<-scale(heat_mat,scale=TRUE,center=FALSE)
#paste(file = "~/github/model.comparison/plots/heatplot_zero_noise_G",k,".jpeg",sep="")
#jpeg(paste("plots/",n, ".fixed_noise.jpg", sep=''))
#paste(“myplot_”, i, “.jpeg”, sep=””)
heatmap.2(data_mat, cellnote=hM,col = cm.colors(256), scale="none", notecol="black", margins=c(5,10),
dendrogram='none', Rowv=FALSE, Colv=FALSE,trace='none',
xlab = "R parameters",
ylab = "G parameters", main = bquote(paste("R vs G of dLL at" ~ sd==.(n))),par(cex.main=.5),srtCol=315, adjCol = c(0,1),cexRow=0.8,cexCol=0.8)
#dev.off()
}
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
# heatmap or fixed R
G.els = unlist( unique(results.sub$G))
colnum = length(G.els)
tmp = unlist( unique(results.sub$sderr))
noise.els = tmp[order(tmp)]
rownum = length(noise.els)
mat = matrix( data=NA, nrow= rownum, ncol=colnum) #noise as row, alpha as columns
rownames(mat) = noise.els
colnames(mat) = G.els
for (j in c(1E-3, 0.002, 0.005,0.008, 0.01,0.03, 0.05) ){
data = results.sub[results.sub[,2]==j, 4]
data<-unlist(data)
heat_mat<-matrix(data,ncol=colnum,nrow=rownum)
#rownames(heat_mat, do.NULL = TRUE, prefix = "row")
rownames(heat_mat) <- c("0","0.5","1","2","3","4","5")
colnames(heat_mat) <- G.els
library(gplots)
hM <- format(round(heat_mat, 2))
data_mat<-scale(heat_mat,scale=TRUE,center=FALSE)
#paste(file = "~/github/model.comparison/plots/heatplot_zero_noise_G",k,".jpeg",sep="")
#jpeg(paste("plots/",j, ".fixed_R.jpg", sep=''))
#paste(“myplot_”, i, “.jpeg”, sep=””)
heatmap.2(data_mat, cellnote=hM,col = cm.colors(256), scale="none", notecol="black", margins=c(5,10),
dendrogram='none', Rowv=FALSE, Colv=FALSE,trace='none',
xlab = "G parameters",
ylab = "noise", main = bquote(paste("G vs. sd of dLL at" ~ R==.(j))),par(cex.main=.5),srtCol=315, adjCol = c(0,1),cexRow=0.8,cexCol=0.8)
#dev.off()
}
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL
## NULL